import pandas as pd
import numpy as np

class DataProcessor:
    def __init__(self, config):
        self.config = config
        
    def load_data(self):
        """加载训练集和测试集"""
        train_df = pd.read_csv(self.config.TRAIN_PATH, sep='\t', nrows=self.config.TRAIN_SAMPLES)
        test_df = pd.read_csv(self.config.TEST_PATH, sep='\t')
        
        # 转换为FastText需要的格式
        train_df['label_ft'] = '__label__' + train_df['label'].astype(str)
        
        return train_df, test_df
    
    def save_temp_train_data(self, train_data, train_index):
        """保存临时训练数据"""
        train_data[['text', 'label_ft']].iloc[train_index].to_csv(
            self.config.TEMP_TRAIN_PATH, 
            header=None, 
            index=False, 
            sep='\t'
        )
    
    def save_submission(self, predictions):
        """保存预测结果"""
        submission = pd.read_csv(self.config.SUBMIT_SAMPLE_PATH)
        submission['label'] = predictions
        submission.to_csv(self.config.OUTPUT_PATH, index=False) 